################################################################################-
# Replication File for Wratil, Wäckerle and Proksch: Government Rhetoric and the 
# Representation of Public Opinion in International Negotiations
#
# This script prepares the participation analysis. 
#
# Additionally, it produces the following graph:
# Figure A1
#
# The CMP dataset cannot be copied to the Dataverse (due to terms of use) and is therefore
# not included in the replication data. In order to recreate the data, please
# go to https://wratil.eu/data and rerun his script to create the file 
# 'parties_cmp2019_online_2.2.dta' and include it in the data folder.
# Unfortunately, this means that this script will not run without this step.
# However, you can just skip this script and go directly to Script #8 and
# work with our pre-loaded dataset.
################################################################################-

library(tidyverse) #version 1.3.2
library(quanteda)  #version 3.2.1
library(readxl)    #version 1.4.0

load(file="generated_data/corpus_final.RData")

sum.corp <- docvars(council_all_nopres)

possible_speeches <- data.frame(expand.grid(unique(sum.corp$Actor),unique(sum.corp$Transcription)),stringsAsFactors = F)
possible_speeches$actor_transcription <- paste(possible_speeches$Var1,possible_speeches$Var2,sep="_")
sum.corp$actor_transcription <- paste(sum.corp$Actor,sum.corp$Transcription,sep="_")

possible_speeches$spoke <- ifelse(possible_speeches$actor_transcription%in%sum.corp$actor_transcription,1,0)
table(possible_speeches$spoke,possible_speeches$Var1)

possible_speeches <- possible_speeches %>% 
  dplyr::rename("Actor" = "Var1",
         "Transcription" = "Var2")
names(sum.corp)
possible_speeches <- possible_speeches %>% 
  left_join(sum.corp %>% select(Transcription,unanimity_any,budget_any,
                                date_correct,year,Council_Config_final)) %>% distinct()

# Add in Eurobarometer Data
eurobarometer <- readstata13::read.dta13("data/eb_responsiveness_apsr.dta")
eurobarometer$nation <- recode(eurobarometer$nation,
                               "bu" = "bg")

# Add in Manifesto Project Data
cmp_positions <- readstata13::read.dta13("data/parties_cmp2019_online_2.2.dta")
cmp_positions <- cmp_positions %>%
  mutate(actor = case_when(
    country == 1 ~ "BE",
    country == 2 ~ "BG",
    country == 3 ~ "CZ",
    country == 4 ~ "DK",
    country == 5 ~ "DE",
    country == 6 ~ "EE",
    country == 7 ~ "IE",
    country == 8 ~ "EL",
    country == 9 ~ "ES",
    country == 10 ~ "FR",
    country == 11 ~ "HR",
    country == 12 ~ "IT",
    country == 13 ~ "CY",
    country == 14 ~ "LV",
    country == 15 ~ "LT",
    country == 16 ~ "LU",
    country == 17 ~ "HU",
    country == 18 ~ "MT",
    country == 19 ~ "NL",
    country == 20 ~ "AT",
    country == 21 ~ "PL",
    country == 22 ~ "PT",
    country == 23 ~ "RO",
    country == 24 ~ "SI",
    country == 25 ~ "SK",
    country == 26 ~ "FI",
    country == 27 ~ "SE",
    country == 28 ~ "UK"
  ))

eu_budget <- read_xlsx("data/eu_budget_contributions_2016.xlsx")
inflation <- read_xls("data/eurostat_inflation_hicp_2019.xls",sheet = "Ready to load")
unemployment <- read_xls("data/eurostat_unemployment_rate_2019.xls",sheet = "Ready to load")

#-------------------------------
# Join in CMP positions
possible_speeches$actor_date <- paste(possible_speeches$Actor,
                                      possible_speeches$date_correct,sep="_")
cmp_positions$actor_date <- paste(cmp_positions$actor,
                                  cmp_positions$date,sep="_")
possible_speeches <- left_join(possible_speeches,cmp_positions %>% 
                         select(-"country",-"actor",-"date"),by="actor_date")

possible_speeches$gov_eu_supporter <- ifelse(possible_speeches$gov_eu_cmp_static>0,
                                              "Europhile Government",
                                              "Eurosceptic Government")

#-------------------------------
# Join in Eurobarometer data
eurobarometer$actor_date <- paste(toupper(eurobarometer$nation),
                                  eurobarometer$start_date,sep="_")
possible_speeches <- left_join(possible_speeches,eurobarometer %>% 
                         select(-"nation",-"country",-"start_date",-"year"),by="actor_date")

#-------------------------------
# Join in Unemployment data
possible_speeches$actor_year <- paste(possible_speeches$Actor,possible_speeches$year,sep="_")
names(unemployment)[1] <- "government"
unemployment$government=toupper(unemployment$government)
unemployment$actor_year <- paste(unemployment$government,
                                 unemployment$year,sep="_")
possible_speeches <- left_join(possible_speeches,unemployment %>%
                         select(-"government",-"year"),by="actor_year")

#-------------------------------
# Join in Inflation data
names(inflation)[1] <- "government"
inflation$government=toupper(inflation$government)
inflation$actor_year <- paste(inflation$government,
                              inflation$year,sep="_")
possible_speeches <- left_join(possible_speeches,inflation %>%
                         select(-"government",-"year"),by="actor_year")

#-------------------------------
# Join in EU budget data
names(eu_budget)[1] <- "year"
eu_budget$government=toupper(eu_budget$government)
eu_budget$actor_year <- paste(eu_budget$government,eu_budget$year,sep="_")
possible_speeches <- left_join(possible_speeches,eu_budget %>%
                         select(-"government",-"year"),by="actor_year")

possible_speeches <- possible_speeches %>% 
  filter(!(date_correct<"2013-07-01"&Actor=="HR")) %>% 
  filter(!(date_correct>"2010-01-01"&date_correct<"2010-07-01"&Actor=="ES")) %>% #Spain: January-June 2010
  filter(!(date_correct>"2010-07-01"&date_correct<"2011-01-01"&Actor=="BE")) %>% #Belgium: July-December 2010
  filter(!(date_correct>"2011-01-01"&date_correct<"2011-07-01"&Actor=="HU")) %>% #Hungary: January-June 2011
  filter(!(date_correct>"2011-07-01"&date_correct<"2012-01-01"&Actor=="PL")) %>% #Poland: July-December 2011
  filter(!(date_correct>"2012-01-01"&date_correct<"2012-07-01"&Actor=="DK")) %>% #Denmark: January-June 2012
  filter(!(date_correct>"2012-07-01"&date_correct<"2013-01-01"&Actor=="CY")) %>% #Cyprus: July-December 2012
  filter(!(date_correct>"2013-01-01"&date_correct<"2013-07-01"&Actor=="IE")) %>% #Ireland: January-June 2013
  filter(!(date_correct>"2013-07-01"&date_correct<"2014-01-01"&Actor=="LT")) %>% #Lithuania: July-December 2013
  filter(!(date_correct>"2014-01-01"&date_correct<"2014-07-01"&Actor=="EL")) %>% #Greece: January-June 2014
  filter(!(date_correct>"2014-07-01"&date_correct<"2015-01-01"&Actor=="IT")) %>% #Italy: July-December 2014
  filter(!(date_correct>"2015-01-01"&date_correct<"2015-07-01"&Actor=="LV")) %>% #Latvia: January-June 2015
  filter(!(date_correct>"2015-07-01"&date_correct<"2016-01-01"&Actor=="LU")) %>% #Luxembourg: July-December 2015
  filter(!(date_correct>"2016-01-01"&date_correct<"2016-07-01"&Actor=="NL")) %>% #Netherlands: January-June 2016
  filter(!(date_correct>"2016-07-01"&date_correct<"2017-01-01"&Actor=="SK"))  #Slovakia: July-December 2016

possible_speeches <- possible_speeches  %>% 
  mutate(accession_year = case_when(
    Actor %in% c("BE","FR","IT","LU","NL","DE") ~ 1957,
    Actor %in% c("DK","IE","UK") ~ 1973,
    Actor %in% c("EL") ~ 1981,
    Actor %in% c("PT","ES") ~ 1986,
    Actor %in% c("AT","FI","SE") ~ 1995,
    Actor %in% c("BG","RO") ~ 2007,
    Actor %in% c("HR") ~ 2013,
    TRUE ~ 2004
  ))

cmp_positions$pm_name <- gsub(" I| II| III| IV| V| VI","",cmp_positions$cabinet_name)
possible_speeches$pm_name <- gsub(" I| II| III| IV| V| VI","",possible_speeches$cabinet_name)

start_dates_cabinets <- cmp_positions %>% 
  group_by(pm_name) %>% 
  summarise(entry_date = min(date))

possible_speeches <- possible_speeches %>% 
  left_join(start_dates_cabinets)

possible_speeches <- possible_speeches %>% 
  mutate(entry_date = case_when(
    pm_name == "Borisov" & date_correct > as.Date("2014-11-06") ~ as.Date("2014-11-07"),
    pm_name == "Rasmussen L" & date_correct > as.Date("2015-06-17") ~ as.Date("2015-06-18"),
    pm_name == "Fico" & date_correct > as.Date("2012-04-03") ~ as.Date("2012-04-04"),
    pm_name == "Orban" & date_correct > as.Date("2010-05-28") ~ as.Date("2010-05-29"),
    pm_name == "Jansa" & date_correct > as.Date("2012-01-27") ~ as.Date("2012-01-28"),
    TRUE ~ entry_date
  ))

#-------------------------------
# Join in distance to election

elections <- readstata13::read.dta13("data/electoral_proximity2020.dta")
elections <- elections %>%
  mutate(actor = case_when(
    country == 1 ~ "BE",
    country == 2 ~ "BG",
    country == 3 ~ "CZ",
    country == 4 ~ "DK",
    country == 5 ~ "DE",
    country == 6 ~ "EE",
    country == 7 ~ "IE",
    country == 8 ~ "EL",
    country == 9 ~ "ES",
    country == 10 ~ "FR",
    country == 11 ~ "HR",
    country == 12 ~ "IT",
    country == 13 ~ "CY",
    country == 14 ~ "LV",
    country == 15 ~ "LT",
    country == 16 ~ "LU",
    country == 17 ~ "HU",
    country == 18 ~ "MT",
    country == 19 ~ "NL",
    country == 20 ~ "AT",
    country == 21 ~ "PL",
    country == 22 ~ "PT",
    country == 23 ~ "RO",
    country == 24 ~ "SI",
    country == 25 ~ "SK",
    country == 26 ~ "FI",
    country == 27 ~ "SE",
    country == 28 ~ "UK"
  )
  )

# Correct Dates
possible_speeches$actor_date <- paste(possible_speeches$Actor,
                                      possible_speeches$date_correct,sep="_")

elections$actor_date <- paste(toupper(elections$actor),
                              elections$date,sep="_")
possible_speeches <- left_join(possible_speeches,elections %>% 
                                select(-"actor",-"nation",-"country"),by="actor_date")

possible_speeches$final_two_months <- ifelse(possible_speeches$distance_elect<62,
                                            "Last Two Months Before Election",
                                            "More Than Two Months Before Election")

possible_speeches$final_two_months <- as.factor(possible_speeches$final_two_months)
possible_speeches$final_two_months <- fct_relevel(possible_speeches$final_two_months,"More Than Two Months Before Election")

possible_speeches$distance_elect_planned <- scale(possible_speeches$distance_elect_planned)
possible_speeches$days_in_office <- as.numeric(possible_speeches$date_correct-possible_speeches$entry_date)
possible_speeches$accession_year <- as.numeric(scale(possible_speeches$accession_year))
possible_speeches$days_in_office <- as.numeric(scale(possible_speeches$days_in_office))
possible_speeches$image_lag6m <- as.numeric(scale(possible_speeches$image_lag6m))
possible_speeches$gov_lr_cmp_static <- as.numeric(scale(possible_speeches$gov_lr_cmp_static))
possible_speeches$eu_cont_gdp <- as.numeric(scale(possible_speeches$eu_cont_gdp))
possible_speeches$unemployment <- as.numeric(scale(possible_speeches$unemployment))
possible_speeches$inflation <- as.numeric(scale(possible_speeches$inflation))

possible_speeches <- possible_speeches %>% 
  dplyr::rename(distance_elect_planned_scaled = distance_elect_planned,
         accession_year_scaled = accession_year,
         days_in_office_scaled = days_in_office,
         image_lag6m_scaled = image_lag6m,
         gov_lr_cmp_static_scaled = gov_lr_cmp_static,
         eu_receipts_gdp_scaled = eu_cont_gdp,
         unemployment_scaled = unemployment,
         inflation_scaled = inflation)

possible_speeches$north_south="Central"
possible_speeches$north_south[possible_speeches$Actor%in%c("BG","RO","IT",
                                                           "EL","ES","PT","MT","CY")] <- "South"
possible_speeches$north_south[possible_speeches$Actor%in%c("FI","SE","DK","LT",
                                                           "EE","LV","UK","NL",
                                                           "IE")] <- "North"

possible_speeches$large_small=ifelse(possible_speeches$Actor%in%c("DE","FR","IT","UK"),
                                     "Large","Small")

summary(possible_speeches$days_in_office)

tab.data <- table(possible_speeches$spoke,possible_speeches$Actor) %>% 
  prop.table(margin=2) %>% 
  as.data.frame() %>% 
  filter(Var1==1) %>% 
  arrange(Freq)

# This is Figure A1
p <- ggplot(tab.data, aes(reorder(Var2, Freq),Freq)) +
  geom_bar(stat = "identity", position = "dodge")+
  labs(x="",y="Share of Speech Opportunities Taken")+
  theme_minimal()+
  theme(legend.title=element_blank())+
  coord_flip()
ggsave(plot=p,"figures_appendix/figure_a_1.eps", width = 6, height = 4, units = "in")

#pdf("", width = 12, height = 6) # Open a new pdf file
#p
#dev.off()  

apply(possible_speeches,2,function(x) table(is.na(x)))

save(file="generated_data/participation_dataset.RData",possible_speeches)
